This is an analysis of a dataset generated in the lab containing
septum from E11.5 PGK-Cre;Rosa26YFP and E12.5 Dbx1-Cre;Rosa26Tomato
embryos Cells were prepared by Matthieu Moreau & Frédéric
Causeret
Libraries were generated by Matthieu Moreau & Frédéric
Causeret
Sequencing was achieved at the genomics platform of Imagine
Reads were aligned on the mm10 genome to which were added: -
LacZ-SV40
- Tomato-WPRE-bGH
library(Seurat)
library(dplyr)
library(patchwork)
library(cowplot)
library(ggplot2)
library(ggExtra)
library(Matrix)
library(RColorBrewer)
library(viridis)
library(wesanderson)
library(MetBrewer)
# Set ggplot theme as classic
theme_set(theme_classic())# Load the raw filtered_gene_bc_matrix outputed by Cell Ranger
Countdata <- Read10X(data.dir = "/shared/ifbstor1/home/fcauseret/Septum/filtered_gene_bc_matrices/")
# Initialize the Seurat object
Septum <- CreateSeuratObject(counts = Countdata,
min.cells = 3,
min.features = 800,
project = "Septum")
Septum$Barcodes <- colnames(Septum)
dim(Septum)## [1] 17280 5825
rm(Countdata)# Percent of mitochondrial counts
Septum[["percent.mt"]] <- PercentageFeatureSet(Septum, pattern = "^mt-")
# Percent of ribosomal counts
Septum[["percent.rb"]] <- PercentageFeatureSet(Septum, pattern = "(^Rpl|^Rps|^Mrp)")# Filter cells based on these thresholds
Cell.QC.Stat <- Septum@meta.data
max.mito.thr <- median(Cell.QC.Stat$percent.mt) + 3*mad(Cell.QC.Stat$percent.mt)
min.mito.thr <- median(Cell.QC.Stat$percent.mt) - 3*mad(Cell.QC.Stat$percent.mt)
Cell.QC.Stat <- Cell.QC.Stat %>% filter(percent.mt < max.mito.thr) %>% filter(percent.mt > min.mito.thr)
Septum@meta.data$Cell.quality <- ifelse(Septum@meta.data$percent.mt > min.mito.thr & Septum@meta.data$percent.mt < max.mito.thr, "High Quality", "Low Quality")
table(Septum$Cell.quality)##
## High Quality Low Quality
## 5444 381
rm(Cell.QC.Stat, max.mito.thr, min.mito.thr)#Violin plot
VlnPlot(Septum, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), ncol = 4, group.by="Cell.quality")# Relation between nCount_RNA and nFeatures_RNA detected with cell quality parameter
p1 <- ggplot(Septum@meta.data, aes(x=nCount_RNA, y=nFeature_RNA)) + geom_point(aes(color=Cell.quality), size=0.1) + geom_smooth(method="lm")
p1 <- ggMarginal(p1, type = "histogram", fill="lightgrey")
p2 <- ggplot(Septum@meta.data, aes(x=log10(nCount_RNA), y=log10(nFeature_RNA))) + geom_point(aes(color=Cell.quality), size=0.1) + geom_smooth(method="lm")
p2 <- ggMarginal(p2, type = "histogram", fill="lightgrey")
# Relation between nFeatures_RNA and the mitochondrial RNA percentage detected with cell quality parameter
p3 <- ggplot(Septum@meta.data, aes(x=nFeature_RNA, y=percent.mt, color=Cell.quality)) + geom_point(size=0.1)
p3 <- ggMarginal(p3, type = "histogram", fill="lightgrey", bins=100)
plot_grid(plotlist = list(p1,p2,p3), ncol=3, align='h', rel_widths = c(1, 1, 1))rm(p1, p2, p3)# Assign cell-cycle scores
s.genes <- c("Mcm5", "Pcna", "Tym5", "Fen1", "Mcm2", "Mcm4", "Rrm1", "Ung", "Gins2", "Mcm6", "Cdca7", "Dtl", "Prim1", "Uhrf1", "Mlf1ip", "Hells", "Rfc2", "Rap2", "Nasp", "Rad51ap1", "Gmnn", "Wdr76", "Slbp", "Ccne2", "Ubr7", "Pold3", "Msh2", "Atad2", "Rad51", "Rrm2", "Cdc45", "Cdc6", "Exo1", "Tipin", "Dscc1", "Blm", " Casp8ap2", "Usp1", "Clspn", "Pola1", "Chaf1b", "Brip1", "E2f8")
g2m.genes <- c("Hmgb2", "Ddk1","Nusap1", "Ube2c", "Birc5", "Tpx2", "Top2a", "Ndc80", "Cks2", "Nuf2", "Cks1b", "Mki67", "Tmpo", " Cenpk", "Tacc3", "Fam64a", "Smc4", "Ccnb2", "Ckap2l", "Ckap2", "Aurkb", "Bub1", "Kif11", "Anp32e", "Tubb4b", "Gtse1", "kif20b", "Hjurp", "Cdca3", "Hn1", "Cdc20", "Ttk", "Cdc25c", "kif2c", "Rangap1", "Ncapd2", "Dlgap5", "Cdca2", "Cdca8", "Ect2", "Kif23", "Hmmr", "Aurka", "Psrc1", "Anln", "Lbr", "Ckap5", "Cenpe", "Ctcf", "Nek2", "G2e3", "Gas2l3", "Cbx5", "Cenpa")
Septum <- CellCycleScoring(Septum,
s.features = s.genes,
g2m.features = g2m.genes,
set.ident = T)
table(Septum$Phase)##
## G1 G2M S
## 2691 1376 1758
rm(s.genes, g2m.genes)# LogNormalize the gene expression matrix (global-scaling normalization method)
Septum <- NormalizeData(Septum, normalization.method = "LogNormalize", scale.factor = 10000)Septum <- FindVariableFeatures(Septum, selection.method = "vst", nfeatures = 2000)
# Identify the 10 most highly variable genes
top20 <- head(VariableFeatures(Septum), 20)
# Plot variable features with and without labels
plot1 <- VariableFeaturePlot(Septum) + theme(legend.position = "top")
plot2 <- LabelPoints(plot = plot1, points = top20, repel = T) + theme(legend.position = "none")
plot_grid(plotlist = list(plot1,plot2), ncol=2, align='h', rel_widths = c(1, 1))rm(top20, plot1, plot2)# Linear transformation : Pre-processing step for dimensional reduction like PCA
Septum <- ScaleData(Septum)Septum <- RunPCA(Septum, features = VariableFeatures(object = Septum))
# Examine and visualize PCA results a few different ways
print(Septum[["pca"]], dims = 1:5, nfeatures = 5)## PC_ 1
## Positive: Mllt11, Tagln3, Rtn1, Tubb3, Tuba1a
## Negative: Hmgb2, Anp32b, Tuba1b, Dek, Phgdh
## PC_ 2
## Positive: Alas2, Gypa, Car2, Acp5, Rhd
## Negative: Tubb2b, Pkm, Set, Map1b, Tuba1a
## PC_ 3
## Positive: Fgf8, Fgf17, Serpinh1, Sparc, Zic4
## Negative: Alas2, Gypa, Pantr1, Car2, Efnb1
## PC_ 4
## Positive: Arl6ip1, Cenpf, Ube2c, Nusap1, Prc1
## Negative: Ung, Pkm, Slc25a5, Mcm2, Mcm6
## PC_ 5
## Positive: Cdkn1c, Hes6, Mfng, Dlk1, Rgs16
## Negative: Socs2, Gm3764, Wnt7b, Gas1, Thra
VizDimLoadings(Septum, dims = 1:2, reduction = "pca")DimHeatmap(Septum, dims = 1:6, cells = 500, balanced = TRUE)# More approximate techniques such as those implemented in ElbowPlot() can be used to reduce computation time
Septum <- JackStraw(Septum, num.replicate = 100)
Septum <- ScoreJackStraw(Septum, dims = 1:20)
# JackStrawPlot
JackStrawPlot(Septum, dims = 1:20)#ElbowPlot
ElbowPlot(Septum)Septum <- RunUMAP(Septum, dims = 1:20)
DimPlot(Septum, reduction = "umap", label = TRUE, label.size = 3, pt.size = 0.1) + NoAxes()Septum <- FindNeighbors(Septum, dims = 1:20)
Septum <- FindClusters(Septum, resolution = 1.5)## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5825
## Number of edges: 181940
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8266
## Number of communities: 24
## Elapsed time: 0 seconds
# Visualize clusters
DimPlot(Septum, reduction = "umap", label = TRUE, label.size = 3, pt.size = 0.1) + NoAxes() + ggtitle(paste(dim(Septum)[2], " septum cells")) DimPlot(Septum, reduction = "umap", label = TRUE, label.size = 3, pt.size = 0.1, group.by="Cell.quality") + NoAxes()FeaturePlot(Septum, features = c("Hba-a1", "Col3a1"), order = T,
cols = c("grey90", brewer.pal(9,"YlOrRd"))) & NoLegend() & NoAxes()Septum <- subset(Septum, idents = c(8, 20, 22), invert = TRUE)
DimPlot(Septum, reduction = "umap", label = TRUE, label.size = 3, pt.size = 0.1) + NoAxes() + ggtitle(paste(dim(Septum)[2], " cells after filtering")) # Generate Spring dimensionality reduction
ExprsMatrix <- as.matrix(GetAssayData(Septum))
exprData <- Matrix(ExprsMatrix, sparse = TRUE)
writeMM(exprData, "ExprData.mtx")## NULL
Genelist <- row.names(ExprsMatrix)
write.table(Genelist, "Genelist.csv", sep="\t", col.names = F, row.names = F)
rm(ExprsMatrix, exprData, Genelist)S.Score <- c("S.Score",Septum@meta.data$S.Score)
S.Score <- paste(S.Score, sep=",", collapse=",")
G2M.Score <- c("G2M.Score",Septum@meta.data$G2M.Score)
G2M.Score <- paste(G2M.Score, sep=",", collapse=",")
Percent.mt <- c("Percent.mt", Septum$percent.mt)
Percent.mt <- paste(Percent.mt, sep = ",", collapse = ",")
Percent.rb <- c("Percent.rb", Septum$percent.rb)
Percent.rb <- paste(Percent.rb, sep = ",", collapse = ",")
nCount <- c("nCount", Septum$nCount_RNA)
nCount <- paste(nCount, sep = ",", collapse = ",")
nFeature <- c("nFeature", Septum$nFeature_RNA)
nFeature <- paste(nFeature, sep = ",", collapse = ",")
ColorTrack <- rbind(S.Score, G2M.Score, Percent.mt, Percent.rb, nCount, nFeature)
write.table(ColorTrack, "ColorTrack.csv", quote =F, row.names = F, col.names = F)
rm(S.Score, G2M.Score, Percent.mt, Percent.rb, nCount, nFeature, ColorTrack)Seurat.clusters <- c("Seurat Clusters", paste0("Cluster",as.character(Septum@meta.data$seurat_clusters)))
Seurat.clusters <- paste(Seurat.clusters, sep=",", collapse=",")
Phase <- c("Phase", Septum@meta.data$Phase)
Phase <- paste(Phase, sep=",", collapse=",")
Quality <- c("Cell Quality", Septum$Cell.quality)
Quality <- paste(Quality, sep = ",", collapse = ",")
Cellgrouping <- rbind(Seurat.clusters, Phase, Quality)
write.table(Cellgrouping, "Cellgrouping.csv", quote =F, row.names = F, col.names = F)
rm(Cellgrouping, Seurat.clusters, Phase, Quality)ExprData.mtx, Genelist.csv, ColorTrack.csv and Cellgrouping.csv are then used as input for the Spring App Cell coordinates of the Spring dimensionality reduction as well as doublet score are then downloaded
doublet.score <- read.table("/shared/ifbstor1/home/fcauseret/Septum/doublet_results.tsv", header = T)
doublet.score <- filter(doublet.score, Observed_or_Simulated == "Observed")
rownames(doublet.score) <- Septum$Barcodes
Septum@meta.data$doublet.score <-doublet.score$Score
VlnPlot(object = Septum, features = "doublet.score", pt.size = 0.2) + geom_hline(yintercept = 0.4, linetype="dashed") + FeaturePlot(Septum, features = c("doublet.score"), order = T,
cols = c("grey90", brewer.pal(9,"YlOrRd")))Septum <- subset(Septum, subset = doublet.score < 0.4)
DimPlot(Septum, reduction = "umap", label = TRUE, label.size = 3, pt.size = 0.1) + NoAxes() + ggtitle(paste(dim(Septum)[2], " cells after doublets removal")) Coordinates <- read.table("/shared/ifbstor1/home/fcauseret/Septum/coordinates.txt", sep = ",", header = F)[,c(2,3)]
rownames(Coordinates) <- Septum$Barcodes
colnames(Coordinates) <- paste0("Spring_", 1:2)
# We will now store this as a custom dimensional reduction : spring
Septum[["Spring"]] <- CreateDimReducObject(embeddings = as.matrix(Coordinates), key = "Spring_", assay = DefaultAssay(Septum))
# Symmetry transform of the coordinates
Spring.Sym <- function(x){
x = abs(max(Coordinates[,2])-x)
}
Coordinates[,2] <- sapply(Coordinates[,2] , function(x) Spring.Sym(x))
Septum[["Spring"]] <- CreateDimReducObject(embeddings = as.matrix(Coordinates), key = "Spring_", assay = DefaultAssay(Septum))
rm(Coordinates, doublet.score)
# Spring visualization
DimPlot(Septum, reduction = "Spring", pt.size = 0.2, label = T, label.size = 3) + NoAxes() + NoLegend()FeaturePlot(Septum,
features = c("Eomes", "Tbr1", "Isl1", "Trp73", "Onecut2", "Dbx1", "dtTomato", "Pcdh8", "Spon1"),
reduction = "Spring",
ncol = 3,
order = T,
cols = c("grey90", brewer.pal(9,"YlGnBu"))) & NoLegend() & NoAxes()saveRDS(Septum, "/shared/ifbstor1/home/fcauseret/Septum/Septum.RDS")#date
format(Sys.time(), "%d %B, %Y, %H,%M")## [1] "09 April, 2022, 19,11"
#Packages used
sessionInfo()## R version 4.1.1 (2021-08-10)
## Platform: x86_64-conda-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
##
## Matrix products: default
## BLAS/LAPACK: /shared/ifbstor1/software/miniconda/envs/r-4.1.1/lib/libopenblasp-r0.3.18.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] MetBrewer_0.2.0 wesanderson_0.3.6 viridis_0.6.2 viridisLite_0.4.0
## [5] RColorBrewer_1.1-2 Matrix_1.3-4 ggExtra_0.9 ggplot2_3.3.5
## [9] cowplot_1.1.1 patchwork_1.1.1 dplyr_1.0.7 SeuratObject_4.0.4
## [13] Seurat_4.1.0
##
## loaded via a namespace (and not attached):
## [1] Rtsne_0.15 colorspace_2.0-3 deldir_1.0-6
## [4] ellipsis_0.3.2 ggridges_0.5.3 rstudioapi_0.13
## [7] spatstat.data_2.1-2 farver_2.1.0 leiden_0.3.9
## [10] listenv_0.8.0 ggrepel_0.9.1 fansi_1.0.2
## [13] codetools_0.2-18 splines_4.1.1 knitr_1.37
## [16] polyclip_1.10-0 jsonlite_1.7.3 ica_1.0-2
## [19] cluster_2.1.2 png_0.1-7 uwot_0.1.11
## [22] shiny_1.7.1 sctransform_0.3.3 spatstat.sparse_2.1-0
## [25] compiler_4.1.1 httr_1.4.2 assertthat_0.2.1
## [28] fastmap_1.1.0 lazyeval_0.2.2 cli_3.2.0
## [31] later_1.3.0 htmltools_0.5.2 tools_4.1.1
## [34] igraph_1.2.11 gtable_0.3.0 glue_1.6.2
## [37] RANN_2.6.1 reshape2_1.4.4 Rcpp_1.0.8.3
## [40] scattermore_0.7 jquerylib_0.1.4 vctrs_0.3.8
## [43] nlme_3.1-153 lmtest_0.9-39 xfun_0.30
## [46] stringr_1.4.0 globals_0.14.0 mime_0.12
## [49] miniUI_0.1.1.1 lifecycle_1.0.1 irlba_2.3.5
## [52] goftest_1.2-3 future_1.23.0 MASS_7.3-54
## [55] zoo_1.8-9 scales_1.1.1 spatstat.core_2.3-2
## [58] promises_1.2.0.1 spatstat.utils_2.3-0 parallel_4.1.1
## [61] yaml_2.2.2 reticulate_1.22 pbapply_1.5-0
## [64] gridExtra_2.3 sass_0.4.0 rpart_4.1-15
## [67] stringi_1.7.6 highr_0.9 rlang_1.0.1
## [70] pkgconfig_2.0.3 matrixStats_0.61.0 evaluate_0.14
## [73] lattice_0.20-45 ROCR_1.0-11 purrr_0.3.4
## [76] tensor_1.5 labeling_0.4.2 htmlwidgets_1.5.4
## [79] tidyselect_1.1.1 parallelly_1.30.0 RcppAnnoy_0.0.19
## [82] plyr_1.8.6 magrittr_2.0.2 R6_2.5.1
## [85] generics_0.1.1 DBI_1.1.2 withr_2.4.3
## [88] mgcv_1.8-38 pillar_1.7.0 fitdistrplus_1.1-6
## [91] survival_3.2-13 abind_1.4-5 tibble_3.1.6
## [94] future.apply_1.8.1 crayon_1.5.0 KernSmooth_2.23-20
## [97] utf8_1.2.2 spatstat.geom_2.3-1 plotly_4.10.0
## [100] rmarkdown_2.11 grid_4.1.1 data.table_1.14.2
## [103] digest_0.6.29 xtable_1.8-4 tidyr_1.1.4
## [106] httpuv_1.6.5 munsell_0.5.0 bslib_0.3.1
IPNP & Imagine Institute, Paris, France, frederic.causeret@inserm.fr↩︎